rm(list=ls())
library(seededlda) #1.4.2
library(qwraps2) #0.6.1
library(quanteda) #4.3.0
library(tidyverse) #2.0.0
library(here) #1.0.1
library(text2vec) #0.6.4
## As highlighted  in the paper,   I did selective stemming of the tokenized corpus 
#for the embeddings

here()
load("bootstrappedgloveperiod1.RData")
load("bootstrappedgloveperiod2.RData")
load("gloverobustnesscheckcosine.RData")

 ##now  a function to get the vector for each vector for period 1 and 2
 #first period 1
 # ————————————————————————————————————————————————————————————
 calc_word_period1  <- function(
    target_word  = "millet",
    boot_samples = bootstrapped_samples_period1 ,
    num_bootstraps = length(bootstrapped_samples_period1 )
 ) {
   
   ## 1. pull one-word vectors --------------------------------------------------
   rmminimal_list_period1  <- vector("list", num_bootstraps)
   
   for (i in seq_len(num_bootstraps)) {
     word_vectorsminimal_period1  <- boot_samples[[i]]
     rmminimal_period1            <- word_vectorsminimal_period1 [target_word, , drop = FALSE]
     rmminimal_list_period1 [[i]] <- rmminimal_period1 
   }
   
   ## 2. cosine similarities ----------------------------------------------------
   cos_sim_list_period1  <- vector("list", num_bootstraps)
   
   for (i in seq_len(num_bootstraps)) {
     word_vectorsminimal_period1  <- boot_samples[[i]]
     rmminimal_period1            <- word_vectorsminimal_period1 [target_word, , drop = FALSE]
     cos_sim_period1              <- sim2(x = word_vectorsminimal_period1 ,
                                          y = rmminimal_period1 ,
                                          method = "cosine",
                                          norm   = "l2")
     cos_sim_list_period1 [[i]]   <- cos_sim_period1 
   }
   
   ## 3. aggregate bootstrap results -------------------------------------------
   result <- map_dfr(cos_sim_list_period1 , ~ as_tibble(.x) %>%
                       mutate(color = row.names(.x), .before = 1) %>%
                       mutate(rn = row_number())) %>%
     pivot_longer(cols = all_of(target_word)) %>% 
     group_by(color, rn) %>% 
     reframe(value = mean(value)) %>% 
     group_by(color) %>% 
     reframe(out = list(mean_ci(value))) %>% 
     unnest_wider(out)
   
   ## 4. export with a dynamic name (e.g. "wheatmean_period1 ") ------------------
   assign(paste0(target_word, "mean_period1 "), result, envir = parent.frame())
   
   return(result)
 }
 
 #then period 2
 calc_word_period2  <- function(
    target_word  = "millet",
    boot_samples = bootstrapped_samples_period2 ,
    num_bootstraps = length(bootstrapped_samples_period2 )
 ) {
   
   ## 1. pull one-word vectors --------------------------------------------------
   rmminimal_list_period2  <- vector("list", num_bootstraps)
   
   for (i in seq_len(num_bootstraps)) {
     word_vectorsminimal_period2  <- boot_samples[[i]]
     rmminimal_period2            <- word_vectorsminimal_period2 [target_word, , drop = FALSE]
     rmminimal_list_period2 [[i]] <- rmminimal_period2 
   }
   
   ## 2. cosine similarities ----------------------------------------------------
   cos_sim_list_period2  <- vector("list", num_bootstraps)
   
   for (i in seq_len(num_bootstraps)) {
     word_vectorsminimal_period2  <- boot_samples[[i]]
     rmminimal_period2            <- word_vectorsminimal_period2 [target_word, , drop = FALSE]
     cos_sim_period2              <- sim2(x = word_vectorsminimal_period2 ,
                                         y = rmminimal_period2 ,
                                         method = "cosine",
                                         norm   = "l2")
     cos_sim_list_period2 [[i]]   <- cos_sim_period2 
   }
   
   ## 3. aggregate bootstrap results -------------------------------------------
   result <- map_dfr(cos_sim_list_period2 , ~ as_tibble(.x) %>%
                       mutate(color = row.names(.x), .before = 1) %>%
                       mutate(rn = row_number())) %>%
     pivot_longer(cols = all_of(target_word)) %>% 
     group_by(color, rn) %>% 
     reframe(value = mean(value)) %>% 
     group_by(color) %>% 
     reframe(out = list(mean_ci(value))) %>% 
     unnest_wider(out)
   
   ## 4. export with a dynamic name (e.g. "wheatmean_period2 ") ------------------
   assign(paste0(target_word, "mean_period2 "), result, envir = parent.frame())
   
   return(result)
 }
 
 num_bootstraps <- 50
 
 
millet_period1_vector <- calc_word_period1("millet",boot_samples = bootstrapped_samples_period1,num_bootstraps = num_bootstraps)
 
şehit_period1_vector <- calc_word_period1("şehit",boot_samples = bootstrapped_samples_period1,num_bootstraps = num_bootstraps)
 
ülke_period1_vector <- calc_word_period1("ülke",boot_samples = bootstrapped_samples_period1,num_bootstraps = num_bootstraps)
 
tarih_period1_vector <- calc_word_period1("tarih",boot_samples = bootstrapped_samples_period1,num_bootstraps = num_bootstraps)

türk_period1_vector <- calc_word_period1("türk",boot_samples = bootstrapped_samples_period1,num_bootstraps = num_bootstraps)


millet_period2_vector <- calc_word_period2("millet",boot_samples = bootstrapped_samples_period2,num_bootstraps = num_bootstraps)

şehit_period2_vector <- calc_word_period2("şehit",boot_samples = bootstrapped_samples_period2,num_bootstraps = num_bootstraps)

ülke_period2_vector <- calc_word_period2("ülke",boot_samples = bootstrapped_samples_period2,num_bootstraps = num_bootstraps)

tarih_period2_vector <- calc_word_period2("tarih",boot_samples = bootstrapped_samples_period2,num_bootstraps = num_bootstraps)

türk_period2_vector <- calc_word_period2("türk",boot_samples = bootstrapped_samples_period2,num_bootstraps = num_bootstraps)

names_p1 <- c("words", "meanbefore", "lclbefore", "uclbefore")
names_p2 <- c("words", "meanafter",  "lclafter",  "uclafter")

millet_period1_vector <- setNames(millet_period1_vector, names_p1)
şehit_period1_vector  <- setNames(şehit_period1_vector,  names_p1)
ülke_period1_vector   <- setNames(ülke_period1_vector,   names_p1)
tarih_period1_vector  <- setNames(tarih_period1_vector,  names_p1)
türk_period1_vector   <- setNames(türk_period1_vector,   names_p1)

millet_period2_vector <- setNames(millet_period2_vector, names_p2)
şehit_period2_vector  <- setNames(şehit_period2_vector,  names_p2)
ülke_period2_vector   <- setNames(ülke_period2_vector,   names_p2)
tarih_period2_vector  <- setNames(tarih_period2_vector,  names_p2)
türk_period2_vector   <- setNames(türk_period2_vector,   names_p2)

## defining dictionaries for the seed words

# Opposition dictionary
oppositiondictionary <- dictionary(
  list(
    Opposition = c("chp", "hdp", "bay", "muhalefet")
  )
)

# West dictionary
westdictionary <- dictionary(
  list(
    West = c("batı", "ab", "abd")
  )
)

# Economy dictionary
economydictionary <- dictionary(
  list(
    Economy = c("ekonomi", "finans", "merkez_bankası")
  )
)

# Terror dictionary
terrordictionary <- dictionary(
  list(
    Terror = c("terör")
  )
)


get_target_average_before <- function(data, text_col, dicts) {
  
  
  base_dfm <- dfm(tokens(data[[text_col]]))
  
  results <- lapply(names(dicts), function(dict_name) {
    dfm_dict <- dfm_lookup(
      base_dfm,
      dictionary = dicts[[dict_name]],
      valuetype = "fixed",            # IMPORTANT!
      case_insensitive = FALSE
    )
    df_counts <- convert(dfm_dict, to = "data.frame") %>%
      rename(!!dict_name := matches("^[^doc_id]")) %>%
      select(!!dict_name)
    
    tmp <- bind_cols(data, df_counts)
    
    tmp %>%
      filter(.data[[dict_name]] > 0) %>%
      summarise(
        meanbefore = mean(meanbefore),
        lclbefore  = mean(lclbefore, na.rm = TRUE),
        uclbefore  = mean(uclbefore, na.rm = TRUE),
        dict       = dict_name
      )
  })
  
  bind_rows(results)
}
 
get_target_average_after <- function(data, text_col, dicts) {
  # build tokens → dfm once
  base_dfm <- dfm(tokens(data[[text_col]]))
  
  # for each dictionary, lookup, bind the counts, filter & summarise
  results <- lapply(names(dicts), function(dict_name) {
    dfm_dict <- dfm_lookup(base_dfm, dictionary = dicts[[dict_name]])
    df_counts <- convert(dfm_dict, to = "data.frame") %>%
      # the count column comes in as name of your dict
      rename(!!dict_name := matches("^[^doc_id]")) %>%
      select(!!dict_name)
    
    # bind counts to your original data
    tmp <- bind_cols(data, df_counts)
    
    # only rows where the dict fires > 0
    tmp %>%
      filter(.data[[dict_name]] > 0) %>%
      summarise(
        meanafter = mean(meanafter),
        lclafter  = mean(lclafter, na.rm = TRUE),
        uclafter  = mean(uclafter, na.rm = TRUE),
        dict     = dict_name
      )
  })
  
  # stack into one data.frame
  bind_rows(results)
}
 
 
seeddictionaries <- list(
  Opposition  = oppositiondictionary,
  Terror      = terrordictionary,
  Economy    = economydictionary,
  West       = westdictionary
)




# Define unwanted tokens explicitly
unwanted <- c("-ekonomi", "konuştuk.ekonomik", "makroekonomik")

# Removing words that causes issues with extraction  in the wrapper function
millet_period1_vector <- millet_period1_vector[!(millet_period1_vector$words %in% unwanted), ]

ülke_period1_vector <- ülke_period1_vector[!(ülke_period1_vector$words %in% unwanted), ]

tarih_period1_vector <- tarih_period1_vector[!(tarih_period1_vector$words %in% unwanted), ]

şehit_period1_vector <- şehit_period1_vector[!(şehit_period1_vector$words %in% unwanted), ]

türk_period1_vector <- türk_period1_vector[!(türk_period1_vector$words %in% unwanted), ]



milletperiod1 <- get_target_average_before(
  data     = millet_period1_vector,
  text_col = "words",
  dicts    = seeddictionaries
)


türkperiod1 <- get_target_average_before(
  data     = türk_period1_vector,
  text_col = "words",
  dicts    = seeddictionaries
)
 


ülkeperiod1 <- get_target_average_before(
  data     = ülke_period1_vector,
  text_col = "words",
  dicts    = seeddictionaries
)

tarihperiod1 <- get_target_average_before(
  data     = tarih_period1_vector,
  text_col = "words",
  dicts    = seeddictionaries
)

şehitperiod1 <- get_target_average_before(
  data     = şehit_period1_vector,
  text_col = "words",
  dicts    = seeddictionaries
)
 

milletperiod2 <- get_target_average_after(
  data     = millet_period2_vector,
  text_col = "words",
  dicts    = seeddictionaries
)


türkperiod2 <- get_target_average_after(
  data     = türk_period2_vector,
  text_col = "words",
  dicts    = seeddictionaries
)



ülkeperiod2 <- get_target_average_after(
  data     = ülke_period2_vector,
  text_col = "words",
  dicts    = seeddictionaries
)

tarihperiod2 <- get_target_average_after(
  data     = tarih_period2_vector,
  text_col = "words",
  dicts    = seeddictionaries
)

şehitperiod2 <- get_target_average_after(
  data     = şehit_period2_vector,
  text_col = "words",
  dicts    = seeddictionaries
)



türkperiod1$period <- "before"

## period 1 (“before”)
milletperiod1$period <- "before"
türkperiod1$period   <- "before"
ülkeperiod1$period    <- "before"
tarihperiod1$period   <- "before"
şehitperiod1$period   <- "before"

## period 2 (“after”)
milletperiod2$period <- "after"
türkperiod2$period   <- "after"
ülkeperiod2$period    <- "after"
tarihperiod2$period   <- "after"
şehitperiod2$period   <- "after" 
 

new_names <- c("mean", "lower", "after", "group", "period")

all_dfs <- c(
  "milletperiod1", "türkperiod1",   "ülkeperiod1",  "tarihperiod1",  "şehitperiod1",
  "milletperiod2", "türkperiod2",   "ülkeperiod2",  "tarihperiod2",  "şehitperiod2"
)

# loop, rename, and re-assign
for (nm in all_dfs) {
  tmp <- get(nm)
  colnames(tmp) <- new_names
  assign(nm, tmp, envir = .GlobalEnv)
}
 


milletcombined <- rbind(milletperiod1, milletperiod2)
türkcombined   <- rbind(türkperiod1,   türkperiod2)
ülkecombined    <- rbind(ülkeperiod1,   ülkeperiod2)
tarihcombined   <- rbind(tarihperiod1,  tarihperiod2)
şehitcombined   <- rbind(şehitperiod1,  şehitperiod2)



milletcombined$word <- "millet"
türkcombined$word <- "türk"
ülkecombined$word <- "ülke"
tarihcombined$word <- "tarih"
şehitcombined$word <- "şehit"




# create a list of the two dataframes
list_of_data <- list(milletcombined, tarihcombined,türkcombined,ülkecombined,şehitcombined)
list_of_data
# subset the data frames to only include numeric columns
list_numeric <- lapply(list_of_data, function(x) x[sapply(x, is.numeric)])

list_numeric 


mean_data <- Reduce(`+`, list_numeric) / length(list_numeric )
mean_data



###now adding the relevant columns
# 1) initialize the new columns
mean_data$group <- NA_character_
mean_data$Period <- NA_character_

# 2) manually assign the group labels by row number
mean_data$group[c(1, 8)] <- "(A) West"        # rows 1 & 8 → A West
mean_data$group[c(2, 3)] <- "(C) Economy"     # rows 2 & 3 → C Economy
mean_data$group[c(4, 7)] <- "(B) Opposition"  # rows 4 & 7 → B Opposition
mean_data$group[5:6]     <- "(D) Terror"      # rows 5 & 6 → D Terror

# 3) assign Period: first 4 are “Before…”, last 4 “After…”
mean_data$Period[1:4] <- "Before the Coup"
mean_data$Period[5:8] <- "After the Coup"

# take a look
print(mean_data)

colnames(mean_data) <- c("mean","lower","upper","group","Period")


mean_data$Period <- factor(mean_data$Period, levels = c("Before the Coup", "After the Coup"))



mean_new <- mean_data



mean_new$grouptrue <- c("(A) West", "(D) Terror","(C) Economy","(B) Opposition","(A) West","(D) Terror","(C) Economy","(B) Opposition")

# Plotting

save(mean_new, file = "cosinesimilaritydataforfigure4.RData")


fig4 <- ggplot(mean_new, aes(x = as.factor(grouptrue), y = mean, group = interaction(grouptrue, Period), color = Period)) +
  # Decrease the width to the size of each facet label
  geom_point(size = 3, alpha = 0.5, position = position_dodge(width = 0.7)) +
  geom_linerange(aes(ymin = lower, ymax = upper),
                 position = position_dodge(width = 0.7),
                 size = 0.7,
                 show.legend = FALSE) +  # hide legend for geom_linerange
  facet_wrap(~grouptrue, scales = "free_x", ncol = 4) +  # Decrease height and offset overlapping parts
  labs(x = "Issue", y = "Semantic Association with Religious Nationalism Terms") +
  scale_color_manual(values = c("Before the Coup" = "black", "After the Coup" = "darkred")) +  # Set line colors to black and dark red
  theme_minimal() +  # Remove gray background
  theme(strip.placement = "outside",  # Place strip text outside the plot area
        strip.background = element_blank(),  # Remove strip background
        strip.text.x = element_blank())  # Remove strip text from x-axis

fig4

ggsave(fig4, filename = "figure4.pdf", width = 6, height = 5)

## The robustness check model in Figure A.1 
###was  trained in exactly the way
#### The only difference is that  Figure A.1
#### divide ALL of the data to 6-month intervals
#### and trains word embeddings
### below is figure A.1


colors <- c("Before the coup" = "#ff7f0e", "After the coup" = "#1f77b4")

figa.1 <- ggplot(robustmodelglove , aes(x = as.factor(Perioddate), y = mean, ymin = lower, ymax = upper,
                                        group = Period, color = Period)) +
  geom_point(position = position_dodge(width = 0.5), size = 2) +
  geom_errorbar(position = position_dodge(width = 0.5), width = 0.2) +
  scale_color_manual(values = colors) +
  labs(x = "Period", y = "Semantic Association with Religious Nationalism Terms") +
  facet_wrap(~ group, scales = "free_y", ncol = 1) +
  theme_minimal()

figa.1

ggsave(figa.1, filename = "figurea.1.pdf", width = 8, height = 8)




